%reload_ext autoreload
%autoreload 2

import numpy as np

import sys

from helper import nn
from helper import logistic_regression as lr

from sklearn.metrics import classification_report

raw_X, raw_y = nn.load_data('ex3data1.mat')

(5000, 400)

prepare data

# add intercept=1 for x0
X = np.insert(raw_X, 0, values=np.ones(raw_X.shape[0]), axis=1)

(5000, 401)

# y have 10 categories here. 1..10, they represent digit 0 as category 10 because matlab index start at 1
# I'll ditit 0, index 0 again
y_matrix = []

for k in range(1, 11):
    y_matrix.append((raw_y == k).astype(int))

# last one is k==10, it's digit 0, bring it to the first position
y_matrix = [y_matrix[-1]] + y_matrix[:-1]
y = np.array(y_matrix)


(10, 5000)

train 1 model

t0 = lr.logistic_regression(X, y[0])

y_pred = lr.predict(X, t0)
print('Accuracy={}'.format(np.mean(y[0] == y_pred)))


train k model

k_theta = np.array([lr.logistic_regression(X, y[k]) for k in range(10)])

(10, 401)

making prediction

  • think about the shape of k_theta, now you are making $X\times\theta^T$

    $(5000, 401) \times (10, 401).T = (5000, 10)$

  • after that, you run sigmoid to get probabilities and for each row, you find the highest prob as the answer

prob_matrix = lr.sigmoid(X @ k_theta.T)

y_pred = np.argmax(prob_matrix, axis=1)

y_answer = raw_y.copy()
y_answer[y_answer==10] = 0

print(classification_report(y_answer, y_pred))

             precision    recall  f1-score   support

          0       0.97      0.99      0.98       500
          1       0.95      0.99      0.97       500
          2       0.95      0.92      0.93       500
          3       0.95      0.91      0.93       500
          4       0.95      0.95      0.95       500
          5       0.92      0.92      0.92       500
          6       0.97      0.98      0.97       500
          7       0.95      0.95      0.95       500
          8       0.93      0.92      0.92       500
          9       0.92      0.92      0.92       500

avg / total       0.94      0.94      0.94      5000

